capture log close
log using "${path_log}/01_read_external.log", replace

*** Read in county to local labor market crosswalk
import delimited "${path_orig}\external\arbeitsmarktregionen-2020.csv", clear 

ren (kreise2020kennziffer arbeitsmarktregionen2020kennziff) (county_id llm_id)
replace county_id = county_id / 1000 // Convert municipality ID to county ID

keep county_id llm_id

save "${path_data}\county_llm", replace

*** Read Google trends data
import delimited "${path_orig}\external\google_trends.csv", clear 

g year = substr(week,1,4)
g month = substr(week,6,2) 

destring year month, replace

g date = ym(year,month)

*** Aggregate to monthly level 
collapse (mean) strompreis, by(date)

format %tmMon_CCYY date 

*** Renormalize maximum to 100
qui sum strompreis
replace strompreis = strompreis / `r(max)' * 100
  
*** Appendix Figure A.1
twoway connected strompreis date, color(blue) xtitle("") ytitle("Google search trend") graphregion(color(white)) bgcolor(white) plotregion(fcolor(white)) xlabel(708(3)747, angle(90)) ylabel(0(20)100) xline(740 747, lpattern(dash) lcolor(black))

graph export "${path_export_graphs}\appendix_figure_a1.pdf", replace

*** Read CPI sub-index data
import delimited "${path_orig}\external\prices_crosswalk.csv", varnames(1) clear

reshape long price, i(coicop) j(time) string

g year = substr(time,-4,4)
replace price = subinstr(price,",",".",.)
replace wgt = subinstr(wgt,",",".",.)
destring price wgt year, replace

g month = 1 if strpos(time,"januar") > 0 
replace month = 2 if strpos(time,"februar") > 0
replace month = 3 if strpos(time,"märz") > 0
replace month = 4 if strpos(time,"april") > 0
replace month = 5 if strpos(time,"mai") > 0
replace month = 6 if strpos(time,"juni") > 0
replace month = 7 if strpos(time,"juli") > 0
replace month = 8 if strpos(time,"august") > 0
replace month = 9 if strpos(time,"september") > 0
replace month = 10 if strpos(time,"oktober") > 0
replace month = 11 if strpos(time,"november") > 0
replace month = 12 if strpos(time,"dezember") > 0

gegen id = group(coicop)
g time_c = ym(year,month)
xtset id time_c, monthly

gsort id time_c 
g inflation_f12_ = F12.price / price - 1 // Inflation next twelve months
g inflation_l12_ = price / L12.price - 1 // Inflation last twelve months

g wave = 21 if year == 2021 & month == 9
replace wave = 22 if year == 2021 & month == 10
replace wave = 23 if year == 2021 & month == 11
replace wave = 24 if year == 2021 & month == 12
replace wave = 25 if year == 2022 & month == 1
replace wave = 26 if year == 2022 & month == 2
replace wave = 27 if year == 2022 & month == 3
replace wave = 28 if year == 2022 & month == 4

keep if inrange(wave,21,28)

*** Collapse to consumption categories in the BOP-HH
collapse (mean) inflation_f12_ inflation_l12_ [w=wgt], by(category wave)

reshape wide inflation_f12_ inflation_l12_, i(wave) j(category) string

ren (inflation_f12_a inflation_f12_b inflation_f12_c inflation_f12_d inflation_f12_e inflation_f12_f inflation_f12_g inflation_f12_h) (inflation_f12_durables inflation_f12_non_durables inflation_f12_clothes_shoes inflation_f12_leisure inflation_f12_mobility inflation_f12_services inflation_f12_vacation inflation_f12_housing)

ren (inflation_l12_a inflation_l12_b inflation_l12_c inflation_l12_d inflation_l12_e inflation_l12_f inflation_l12_g inflation_l12_h) (inflation_l12_durables inflation_l12_non_durables inflation_l12_clothes_shoes inflation_l12_leisure inflation_l12_mobility inflation_l12_services inflation_l12_vacation inflation_l12_housing)

save "${path_data}\cpi_subindices", replace

*** Load professional forecast data
import delimited "${path_orig}\external\consensus_economics.csv", clear 

replace consensus_forecast = subinstr(consensus_forecast,",",".",.)
destring consensus_forecast, replace

gsort year month 
g time = ym(year,month)

*** Interpolate the missing values
ipolate consensus_forecast time, gen(consensus_forecast_int)

preserve 

g wave = 21 if year == 2021 & mon == 9
replace wave = 22 if year == 2021 & mon == 10
replace wave = 23 if year == 2021 & mon == 11
replace wave = 24 if year == 2021 & mon == 12
replace wave = 25 if year == 2022 & mon == 1
replace wave = 26 if year == 2022 & mon == 2
replace wave = 27 if year == 2022 & mon == 3
replace wave = 28 if year == 2022 & mon == 4 

keep if inrange(wave,21,28)

keep wave consensus_forecast_int

save "${path_data}\forecast_hh", replace 

restore 

g wave = 6 if year == 2021 & mon == 7
replace wave = 7 if year == 2021 & mon == 8
replace wave = 8 if year == 2021 & mon == 9
replace wave = 9 if year == 2021 & mon == 10
replace wave = 10 if year == 2021 & mon == 11
replace wave = 11 if year == 2021 & mon == 12
replace wave = 12 if year == 2022 & mon == 1
replace wave = 13 if year == 2022 & mon == 2
replace wave = 14 if year == 2022 & mon == 3

keep if inrange(wave,6,14)

keep wave consensus_forecast_int

save "${path_data}\forecast_f", replace 

*** Load inflation data 
import delimited "${path_orig}\external\cpi.csv", clear 

replace index = subinstr(index,",",".",.)
destring index, replace

g time = ym(year,mon)
 
tsset time, monthly
g cpi_l12m = (index / L12.index - 1) * 100 // Inflation last twelve months
g cpi_f12m = F12.cpi_l12m // Inflation next twelve months

preserve 

g wave = 21 if year == 2021 & month == 9
replace wave = 22 if year == 2021 & month == 10
replace wave = 23 if year == 2021 & month == 11
replace wave = 24 if year == 2021 & month == 12
replace wave = 25 if year == 2022 & month == 1
replace wave = 26 if year == 2022 & month == 2
replace wave = 27 if year == 2022 & month == 3
replace wave = 28 if year == 2022 & month == 4

keep if inrange(wave,21,28)

keep wave cpi_l12m cpi_f12m

save "${path_data}\cpi_hh", replace

restore 

g wave = 6 if year == 2021 & month == 7
replace wave = 7 if year == 2021 & month == 8
replace wave = 8 if year == 2021 & month == 9
replace wave = 9 if year == 2021 & month == 10
replace wave = 10 if year == 2021 & month == 11
replace wave = 11 if year == 2021 & month == 12
replace wave = 12 if year == 2022 & month == 1
replace wave = 13 if year == 2022 & month == 2
replace wave = 14 if year == 2022 & month == 3 

keep if inrange(wave,6,14)

keep wave cpi_l12m cpi_f12m

save "${path_data}\cpi_f", replace

*** Read Verivox data
import excel "${path_orig}\external\verivox.xlsx", sheet("Tabelle1") firstrow clear

g time = ym(year,mon)
format %tmMon_CCYY time 

*** Figure 1b
twoway connected price_hh time, color(blue) xlabel(708(3)747, angle(90)) xtitle("") ytitle("") xline(740 747, lpattern(dash) lcolor(black)) legend(on label(1 "cents per kWh") order(1) position(6)) graphregion(color(white)) bgcolor(white) plotregion(fcolor(white))

graph export "${path_export_graphs}\figure_1b.pdf", replace

*** Read electricity cpi data
import delimited "${path_orig}\external\cpi_elec.csv", rowrange(:42) clear

replace index_elec = subinstr(index_elec,",",".",.)
destring index_elec, replace

replace month = lower(month)

g mon = 1 if month == "januar"
replace mon = 2 if month == "februar"
replace mon = 3 if month == "märz"
replace mon = 4 if month == "april"
replace mon = 5 if month == "mai"
replace mon = 6 if month == "juni"
replace mon = 7 if month == "juli"
replace mon = 8 if month == "august"
replace mon = 9 if month == "september"
replace mon = 10 if month == "oktober"
replace mon = 11 if month == "november"
replace mon = 12 if month == "dezember"

g time = ym(year,mon)
format %tmMon_CCYY time 
tsset time 

g cpi_elec_monthly = (index_elec / L1.index_elec - 1) * 100 
drop if mi(cpi_elec_monthly)
                      
*** Appendix Figure A.2
twoway connected cpi_elec_monthly time, color(blue) xline(708 720 732 744, lcolor(black) lpattern(dash)) xlabel(708(12)744) xtitle("") ytitle("month-to-month electricty CPI") graphregion(color(white)) plotregion(color(white)) bgcolor(white)

graph export "${path_export_graphs}\appendix_figure_a2.pdf", replace 

*** Read electricity price data
import excel "${path_orig}\external\electricity_wholesale_2021_2024.xlsx", sheet("Großhandelspreise") cellrange(A10:C52) firstrow clear
 
g year = substr(Datumvon,7,4)
g month = substr(Datumvon,4,2)
destring year month, replace

ren DeutschlandLuxemburgMWh price_elec

keep year month price_elec

tempfile price_elec
save `price_elec'

*** Read natural gas price data
import delimited "${path_orig}\external\PNGASEUUSDM_2021_2024.csv", clear 

ren pngaseuusdm price_gas 

g year = substr(date,1,4)
g month = substr(date,6,2) 
 
destring year month price_gas, replace

keep year month price_gas

tempfile price_gas
save `price_gas'  

*** Read oil price data
import delimited "${path_orig}\external\DCOILBRENTEU_2021_2024.csv", clear 

ren dcoilbrenteu price_oil

g year = substr(date,1,4)
g month = substr(date,6,2)
 
destring year month price_oil, replace

keep year month price_oil

tempfile price_oil
save `price_oil'   

*** Read coal price data
import delimited "${path_orig}\external\PCOALAUUSDM_2021_2024.csv", clear

ren pcoalauusdm price_coal 

g year = substr(date,1,4)
g month = substr(date,6,2)

destring year month price_coal, replace

keep year month price_coal

tempfile price_coal
save `price_coal'   

*** Merge all market price data 
use `price_elec', clear

merge 1:1 year month using `price_gas'  
keep if _merge == 3
drop _merge
merge 1:1 year month using `price_oil'   
keep if _merge == 3
drop _merge 
merge 1:1 year month using `price_coal'
keep if _merge == 3
drop _merge 

preserve
 
g time = ym(year,month)
format %tmMon_CCYY time 

*** Normalize prices to January 2021 for the descriptive graph
foreach v of varlist price_* {

	qui sum `v' if year == 2021 & month == 1
	g `v'_norm = `v' / `r(mean)'
	
} // v
         
*** Figure 2b
twoway (connected price_gas_norm time if year == 2021 | (year == 2022 & inrange(month,1,3)), color(blue) msymbol(O) lpattern(solid)) || ///
	   (connected price_elec_norm time if year == 2021 | (year == 2022 & inrange(month,1,3)), color(red) msymbol(D) lpattern(dash)) || ///
	   (connected price_oil_norm time if year == 2021 | (year == 2022 & inrange(month,1,3)), color(purple) msymbol(S) lpattern(dash_dot)) || ///
	   (connected price_coal_norm time if year == 2021 | (year == 2022 & inrange(month,1,3)), color(black) msymbol(T) lpattern(dot)), ///
	   xlabel(732(1)746, angle(90)) xtitle("") ytitle("price increase since January 2021") ylabel(,format(%14.0f)) xline(738 746, lpattern(dash) lcolor(black)) legend(label(1 "natural gas") label(2 "electricity") label(3 "oil") label(4 "coal") order(1 2 3 4) row(1) position(6)) graphregion(color(white)) bgcolor(white) plotregion(fcolor(white))  
	 						
graph export "${path_export_graphs}\figure_2b.pdf", replace 

restore

*** Normalize prices to December 2021 for the shift-share analysis
foreach p of varlist price_* {
	
	qui sum `p' if year == 2021 & month == 12
	replace `p' = `p' / `r(mean)' - 1
	
} // p

replace year = year - 2000 // Make consistent with BOP-F year format

save "${path_data}\marketprices.dta", replace

log close